Load Data

The original dataset is in gexf format, we need to read it and convert it to igraph.

# read network data
day1 <- read.gexf('sp_data_school_day_1_g.gexf')
day1 <- gexf.to.igraph(day1)
day2 <- read.gexf('sp_data_school_day_2_g.gexf')
day2 <- gexf.to.igraph(day2)
# merge two networks
contact <- igraph::union(day1, day2, byname = 'auto')
# read and clean attribute data
attribute <- read.table('metadata.txt', header = FALSE)
attribute$type <- ifelse(attribute$V2 == 'Teachers', 'teacher', 'student')
attribute$grade <- ifelse(attribute$type =='teacher', 'N/A', substr(attribute$V2, 1, 1))
attribute$gender <- ifelse(attribute$type =='teacher', 'N/A', as.character(attribute$V3))
attribute <- subset(attribute, select = -c(V3))
colnames(attribute) <- c('name', 'class', 'type', 'grade', 'gender')
num <- cbind(V(contact), get.vertex.attribute(contact, 'name', index = V(contact)))
num <- as.data.frame(num)
colnames(num) <- c('id', 'name')
num$name <- as.integer(as.character(num$name))
attribute$id <- left_join(attribute, num, by = 'name')$id
attribute$id <- as.integer(as.character(attribute$id))
attribute <- arrange(attribute, id)
attriname <- colnames(attribute)[2:5]
head(attribute)
##   name class    type grade gender id
## 1 1789    1A student     1      M  1
## 2 1780    3A student     3      M  2
## 3 1782    3A student     3      M  3
## 4 1783    1A student     1      M  4
## 5 1787    1A student     1      F  5
## 6 1546    4A student     4      F  6
# data exploration
sqldf('SELECT type, COUNT(*) FROM attribute GROUP BY type')
##      type COUNT(*)
## 1 student      232
## 2 teacher       10
sqldf('SELECT grade, COUNT(*) FROM attribute WHERE grade != "N/A" GROUP BY grade')
##   grade COUNT(*)
## 1     1       48
## 2     2       49
## 3     3       45
## 4     4       44
## 5     5       46
sqldf('SELECT gender, COUNT(*) FROM attribute WHERE gender != "N/A" GROUP BY gender')
##    gender COUNT(*)
## 1       F      112
## 2       M      115
## 3 Unknown        5

Network Formation

# set vertex and edge attributes
for (i in V(contact)){
  vname <- get.vertex.attribute(contact, 'name', index = i)
  for (j in attriname){
    vattri <- subset(attribute, name == vname)[,j]
    contact <- set.vertex.attribute(contact, j, index=i, vattri)
  }
}
for (i in E(contact)){
  weight1 <- get.edge.attribute(contact, 'weight_1', index = i)
  weight2 <- get.edge.attribute(contact, 'weight_2', index = i)
  weight1 <- ifelse(is.na(weight1), 0, 1)
  weight2 <- ifelse(is.na(weight2), 0, 1)
  contact <- set.edge.attribute(contact, 'weight', index = i, weight1 + weight2)
  start = get.vertex.attribute(contact, 'type', index = ends(contact, i)[1])
  end = get.vertex.attribute(contact, 'type', index = ends(contact, i)[2])
  contact <- set.edge.attribute(contact, 'same.type', index = i, as.integer(start == end))
}
contact <- delete_edge_attr(contact, 'weight_1')
contact <- delete_edge_attr(contact, 'weight_2')
summary(contact)
## IGRAPH b0cfa90 UNWB 242 8316 -- 
## + attr: name (v/c), class (v/n), type (v/c), grade (v/c), gender
## | (v/c), weight (e/n), same.type (e/n)
# create a network based on strong relation (weight = 2)
contact_strong <- delete.edges(contact, 
                               E(contact)[get.edge.attribute(contact,
                                                             name = "weight") < 2])
summary(contact_strong)
## IGRAPH de7f734 UNWB 242 3122 -- 
## + attr: name (v/c), class (v/n), type (v/c), grade (v/c), gender
## | (v/c), weight (e/n), same.type (e/n)

Network Metrics Analysis

Initial Exploration

# basic plot
# plot contact network
type_color = get.vertex.attribute(contact, 'type')
colors = c("#FF0000FF", "#FFFF00FF")
type_color[type_color == 'teacher'] = colors[1]
type_color[type_color == 'student'] = colors[2]
layout <- layout_nicely(contact, dim = 2)
plot(contact, 
     layout=layout, 
     vertex.color=type_color, 
     vertex.label=NA, 
     edge.arrow.size=.2,
     vertex.size=5)
legend(x=-1.5, y=-1.1, c("Teacher","Student"), pch=21,
       col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

# plot strong contact network
plot(contact_strong, 
     layout=layout, 
     vertex.color=type_color, 
     vertex.label=NA, 
     edge.arrow.size=.2,
     vertex.size=5)
legend(x=-1.5, y=-1.1, c("Teacher","Student"), pch=21,
       col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

Degree Distribution

# degree
deg <- degree(contact) 
deg_strong <- degree(contact_strong)
mean(deg)
## [1] 68.72727
mean(deg_strong)
## [1] 25.80165
deg <- data.frame(type = get.vertex.attribute(contact, 'type'), 
                  gender = get.vertex.attribute(contact, 'gender'),
                  grade = get.vertex.attribute(contact, 'grade'),
                  deg = deg)
deg_strong <- data.frame(type = get.vertex.attribute(contact_strong, 'type'), 
                         gender = get.vertex.attribute(contact_strong, 'gender'),
                         grade = get.vertex.attribute(contact_strong, 'grade'),
                         deg = deg_strong)
# degree distribution
deg.dist = degree_distribution(contact, cumulative=F, mode="all")
plot( x=0:(length(deg.dist)-1), y=deg.dist, pch=19, cex=1, col="orange", 
      xlab="Degree", ylab="Frequency")

deg.strong.dist = degree_distribution(contact_strong, cumulative=F, mode="all")
plot( x=0:(length(deg.strong.dist)-1), y=deg.strong.dist, pch=19, cex=1, col="orange", 
      xlab="Degree", ylab="Frequency")

# degree by type
ggplot(data = deg) + geom_histogram(aes(x = deg, fill = type),bins = 15) + 
  facet_grid(type ~ .)

ggplot(data = deg_strong) + geom_histogram(aes(x = deg, fill = type),bins = 15) + 
  facet_grid(type ~ .)

# degree by gender
ggplot(data = subset(deg, gender != 'N/A' & gender != 'Unknown')) + 
  geom_histogram(aes(x = deg, fill = gender),bins = 15) + facet_grid(gender ~ .)

ggplot(data = subset(deg_strong, gender != 'N/A' & gender != 'Unknown')) + 
  geom_histogram(aes(x = deg, fill = gender),bins = 15) + facet_grid(gender ~ .)

# degree by grade
ggplot(data = subset(deg, grade != 'N/A')) + 
  geom_histogram(aes(x = deg, fill = grade),bins = 15) + facet_grid(grade ~ .)

ggplot(data = subset(deg_strong, grade != 'N/A')) + 
  geom_histogram(aes(x = deg, fill = grade),bins = 15) + facet_grid(grade ~ .)

Basic Statistics

reachability <- function(g) {
  reach_mat = matrix(nrow = vcount(g), 
                     ncol = vcount(g))
  for (i in 1:vcount(g)) {
    reach_mat[i,] = 0
    this_node_reach <- subcomponent(g, i)
    
    for (j in 1:(length(this_node_reach))) {
      alter = this_node_reach[j]
      reach_mat[i, alter] = 1
    }
  }
  return(reach_mat)
}

reach <- reachability(contact)
reach_strong <- reachability(contact_strong)
mean(reach)
## [1] 1
mean(reach_strong)
## [1] 0.9192337
sp <- shortest.paths(contact)
sp_strong <- shortest.paths(contact_strong)
mean(sp[which(sp != Inf)])
## [1] 1.850488
mean(sp_strong[which(sp_strong != Inf)])
## [1] 4.678307
# density
graph.density(contact)
## [1] 0.2851754
graph.density(contact_strong)
## [1] 0.1070608
# transitivity
transitivity(contact)
## [1] 0.479764
transitivity(contact_strong)
## [1] 0.5492348
# diameter
diameter(contact)
## [1] 3
diameter(contact_strong)
## [1] 10

Centrality Analysis

# closeness centrality
clo <- closeness(contact)
V(contact)$size <- (clo*600)^5
grade_color <- get.vertex.attribute(contact, 'grade')
colors <- rainbow(6)
grade_color[grade_color == 'N/A'] = colors[1]
grade_color[grade_color == '1'] = colors[2]
grade_color[grade_color == '2'] = colors[3]
grade_color[grade_color == '3'] = colors[4]
grade_color[grade_color == '4'] = colors[5]
grade_color[grade_color == '5'] = colors[6]
plot(contact, 
     layout=layout, 
     vertex.color=grade_color, 
     vertex.label=NA, 
     edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"), 
       pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

clo_strong <- closeness(contact_strong)
V(contact_strong)$size <- (clo_strong*5000)^5
plot(contact_strong, 
     layout=layout, 
     vertex.color=grade_color, 
     vertex.label=NA, 
     edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"), 
       pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

# betweeness centrality
bet <- betweenness(contact)
V(contact)$size <- 0.8*bet^(0.4)
plot(contact, 
     layout=layout, 
     vertex.color=grade_color, 
     vertex.label=NA, 
     edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"), 
       pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

bet_strong <- betweenness(contact_strong)
V(contact_strong)$size <- 0.8*bet_strong^(0.4)
plot(contact_strong, 
     layout=layout, 
     vertex.color=grade_color, 
     vertex.label=NA, 
     edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"), 
       pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

# Eigenvector centrality
eig <- eigen_centrality(contact)
V(contact)$size <- (10*eig$vector)
plot(contact, 
     layout=layout, 
     vertex.color=grade_color, 
     vertex.label=NA, 
     edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"), 
       pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

eig_strong <- eigen_centrality(contact_strong)
V(contact_strong)$size <- (15*eig_strong$vector)
plot(contact_strong, 
     layout=layout, 
     vertex.color=grade_color, 
     vertex.label=NA, 
     edge.arrow.size=.2)
legend(x=-1.5, y=-1.1, c("Teacher", "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5"), 
       pch=21, col="#777777", pt.bg=colors, pt.cex=2, cex=.8, bty="n", ncol=1)

# analysis
centrality <- data.frame(id = attribute$id, degree = deg$deg, type = attribute$type, 
                         close = clo, between = bet, eigen = eig$vector)
ggplot(centrality) + geom_point(aes(x = between, y = close, color = type)) +
  labs(x = "Betweenness centrality", y = "Close Centrality")

ggplot(centrality) + geom_point(aes(x = close, y =eigen, color = type)) +
  labs(x = "Close Centrality", y = "Eigenvector Centrality")

ggplot(centrality) + geom_point(aes(x = between, y = eigen, color = type)) +
  labs(x = "Betweenness centrality", y = "Eigenvector Centrality")

cor(select(centrality, degree, close, between, eigen))
##            degree     close   between     eigen
## degree  1.0000000 0.9421107 0.8875039 0.9711950
## close   0.9421107 1.0000000 0.8615883 0.8745694
## between 0.8875039 0.8615883 1.0000000 0.8091362
## eigen   0.9711950 0.8745694 0.8091362 1.0000000
centrality_strong <- data.frame(degree=deg_strong$deg, close = clo_strong, 
                         between = bet_strong, eigen = eig_strong$vector)
cor(centrality_strong)
##            degree     close   between     eigen
## degree  1.0000000 0.6513993 0.7624974 0.8123224
## close   0.6513993 1.0000000 0.2832208 0.4115593
## between 0.7624974 0.2832208 1.0000000 0.5225747
## eigen   0.8123224 0.4115593 0.5225747 1.0000000

Teacher-student Communication Analysis

# here we only consider the contact between students and teachers
dif.type <- delete.edges(contact, E(contact)[get.edge.attribute(contact, name = "same.type")==1])
summary(dif.type)
## IGRAPH c66f729 UNWB 242 432 -- 
## + attr: name (v/c), class (v/n), type (v/c), grade (v/c), gender
## | (v/c), size (v/n), weight (e/n), same.type (e/n)
V(dif.type)$type <- bipartite_mapping(dif.type)$type
V(dif.type)$color <- ifelse(V(dif.type)$type, "salmon", "yellow")
V(dif.type)$shape <- ifelse(V(dif.type)$type, "square", "circle")
V(dif.type)$label <- ifelse(V(dif.type)$type, get.vertex.attribute(dif.type, 'name'), NA)
plot(dif.type, vertex.label.cex = 0.6, vertex.label.color = 'black',
     layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher","Student"), pch=c(22, 21),
       col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)

grade1 <- delete.vertices(dif.type, 
                          V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=1 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade1, vertex.label.cex = 0.6, vertex.label.color = 'black',
     layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 1"), pch=c(22, 21),
       col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)

# teacher 1745 and 1753 are responsible for students in grade 1
grade2 <- delete.vertices(dif.type, 
                          V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=2 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade2,  vertex.label.cex = 0.6, vertex.label.color = 'black',
    layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 2"), pch=c(22, 21),
       col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)

# teacher 1650 and 1852 are responsible for students in grade 2
grade3 <- delete.vertices(dif.type, 
                          V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=3 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade3, vertex.label.cex = 0.6, vertex.label.color = 'black',
     layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 3"), pch=c(22, 21),
       col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)

# teacher 1709 and 1746 are responsible for students in grade 3
grade4 <- delete.vertices(dif.type, 
                          V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=4 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade4,vertex.label.cex = 0.6, vertex.label.color = 'black',
     layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 4"), pch=c(22, 21),
       col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)

# teacher 1521 and 1653 are responsible for students in grade 4
grade5 <- delete.vertices(dif.type, 
                          V(dif.type)[get.vertex.attribute(dif.type, name = 'grade')!=5 & get.vertex.attribute(dif.type, name = 'grade')!='N/A'])
plot(grade3, vertex.label.cex = 0.6, vertex.label.color = 'black',
     layout = layout.bipartite, vertex.size=8)
## Warning in v(graph): vertex types converted to logical
legend(x=-1.5, y=-1.1, c("Teacher", "Student in Grade 5"), pch=c(22, 21),
       col="#777777", pt.bg=c("salmon", "yellow"), pt.cex=2, cex=.8, bty="n", ncol=1)

# teacher 1709 and 1746 are responsible for students in grade 5

# degree distribution analysis
deg <- degree(dif.type)
deg <- data.frame(deg = deg, type = get.vertex.attribute(dif.type, 'type'), 
                  name = get.vertex.attribute(dif.type, 'name'))
deg.tea <- subset(deg, type == 'TRUE', select = c(name, deg))
mean(deg.tea$deg)
## [1] 43.2
deg.stu <- subset(deg, type == 'FALSE', select = c(name, deg))
mean(deg.stu$deg)
## [1] 1.862069
arrange(deg.tea, -deg)
##    name deg
## 1  1852  54
## 2  1709  53
## 3  1650  49
## 4  1745  49
## 5  1746  46
## 6  1668  44
## 7  1653  39
## 8  1753  34
## 9  1521  33
## 10 1824  31
head(arrange(deg.stu, deg))
##   name deg
## 1 1775   0
## 2 1766   0
## 3 1799   0
## 4 1774   1
## 5 1836   1
## 6 1770   1

Block Model Analysis

Class Based Model

# create same class network
same.class <- matrix(0, 242, 242)
for (i in 1:242){
  for (j in 1:242){
    same.class[i,j] = (subset(attribute, id == i)$class == 
                         subset(attribute, id == j)$class)
  }
}
same.class <- graph.adjacency(same.class, mode = "undirected")
# same class block model
contact.sna <- asNetwork(contact)
contact.strong.sna <- asNetwork(contact_strong)
same.class.sna <- asNetwork(same.class)
# using same class network to build block model
eq <- equiv.clust(same.class.sna, mode="digraph")
# block model of contact network
b <- blockmodel(contact.sna, eq, k=11)
classgroup <- data.frame(name = b$plabels, block = b$block.membership)
classgroup$name <- as.integer(as.character(classgroup$name))
classgroup$block <- as.integer(as.character(classgroup$block))
classgroup <- sqldf("SELECT MIN(name) AS id, block FROM classgroup GROUP BY block")
classgroup$class <- left_join(classgroup, attribute, by = 'id')$class

bimage <- b$block.model
colnames(bimage) <- classgroup$class
rownames(bimage) <- classgroup$class
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") + 
  scale_fill_gradient(low = "white",high = "red") +
  labs(x="", y="")

den <- graph.density(contact)
bimage[bimage < den] <- 0
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") + 
  scale_fill_gradient(low = "white",high = "red") +
  labs(x="", y="")

set.seed(1000)
gplot(bimage, diag = TRUE,
      label = colnames(bimage),
      mode = "fruchtermanreingold") 

# block model of strong contact network
b <- blockmodel(contact.strong.sna, eq, k=11)
classgroup <- data.frame(name = b$plabels, block = b$block.membership)
classgroup$name <- as.integer(as.character(classgroup$name))
classgroup$block <- as.integer(as.character(classgroup$block))
classgroup <- sqldf("SELECT MIN(name) AS id, block FROM classgroup GROUP BY block")
classgroup$class <- left_join(classgroup, attribute, by = 'id')$class

bimage <- b$block.model
colnames(bimage) <- classgroup$class
rownames(bimage) <- classgroup$class
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") + 
  scale_fill_gradient(low = "white",high = "red") +
  labs(x="", y="")

den <- graph.density(contact)
bimage[bimage < den] <- 0
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
ggplot(m.bimage, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") + 
  scale_fill_gradient(low = "white",high = "red") +
  labs(x="", y="")

set.seed(1000)
gplot(bimage, diag = TRUE,
      label = colnames(bimage),
      mode = "fruchtermanreingold")

Gender Based Model

# same gender network
same.gender <- matrix(0, 242, 242)
for (i in 1:242){
  for (j in 1:242){
    same.gender[i,j] = (subset(attribute, id == i)$gender == 
                         subset(attribute, id == j)$gender)
  }
}
same.gender <- graph.adjacency(same.gender, mode = "undirected")# same class block model
same.gender.sna <- asNetwork(same.gender)
# using same gender network to build block model
eq <- equiv.clust(same.gender.sna, mode="digraph")
# block model of contact network
b <- blockmodel(contact.sna, eq, k=4)
gendergroup <- data.frame(name = b$plabels, block = b$block.membership)
gendergroup$name <- as.integer(as.character(gendergroup$name))
gendergroup$block <- as.integer(as.character(gendergroup$block))
gendergroup <- sqldf("SELECT MIN(name) AS id, block FROM gendergroup GROUP BY block")
gendergroup$id <- as.integer(gendergroup$id)
gendergroup$gender <- left_join(gendergroup, attribute, by = 'id')$gender

bimage <- b$block.model
colnames(bimage) <- gendergroup$gender
rownames(bimage) <- gendergroup$gender
bimage <- bimage[1:2, 1:2]
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
bimage
##           M         F
## M 0.3542334 0.2786491
## F 0.2786491 0.2689833
ggplot(m.bimage, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") + 
  scale_fill_gradient(low = "white",high = "red") +
  labs(x="", y="")

# block model of strong contact network
b <- blockmodel(contact.strong.sna, eq, k=4)
gendergroup <- data.frame(name = b$plabels, block = b$block.membership)
gendergroup$name <- as.integer(as.character(gendergroup$name))
gendergroup$block <- as.integer(as.character(gendergroup$block))
gendergroup <- sqldf("SELECT MIN(name) AS id, block FROM gendergroup GROUP BY block")
gendergroup$id <- as.integer(gendergroup$id)
gendergroup$gender <- left_join(gendergroup, attribute, by = 'id')$gender

bimage <- b$block.model
colnames(bimage) <- gendergroup$gender
rownames(bimage) <- gendergroup$gender
bimage <- bimage[1:2, 1:2]
m.bimage <- melt(bimage)
m.bimage$Var1 <- as.character(m.bimage$Var1)
m.bimage$Var2 <- as.character(m.bimage$Var2)
bimage
##           M         F
## M 0.1261632 0.1027174
## F 0.1027174 0.1047297
ggplot(m.bimage, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") + 
  scale_fill_gradient(low = "white",high = "red") +
  labs(x="", y="")

Logistic Regression

# number of common friends matrix
common.friends <- matrix(0, 242, 242)
for (i in 1:242){
  for (j in 1:242){
    common.friends[i,j] = cocitation(contact_strong, i)[j]
  }
}
contact.strong.matrix <- as.matrix(get.adjacency(contact_strong))
log <- netlogit(contact.strong.matrix, common.friends,reps=100)
summary(log)
## 
## Network Logit Model
## 
## Coefficients:
##             Estimate   Exp(b)     Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -4.8111309 0.00813865 1       0       1        
## x1           0.3900841 1.47710499 1       0       0        
## 
## Goodness of Fit Statistics:
## 
## Null deviance: 80851.46 on 58322 degrees of freedom
## Residual deviance: 14291.61 on 58320 degrees of freedom
## Chi-Squared test of fit improvement:
##   66559.85 on 2 degrees of freedom, p-value 0 
## AIC: 14295.61    BIC: 14313.55 
## Pseudo-R^2 Measures:
##  (Dn-Dr)/(Dn-Dr+dfn): 0.5329826 
##  (Dn-Dr)/Dn: 0.8232363 
## Contingency Table (predicted (rows) x actual (cols)):
## 
##          Actual
## Predicted       0       1
##         0   51152    1626
##         1     926    4618
## 
##  Total Fraction Correct: 0.9562429 
##  Fraction Predicted 1s Correct: 0.8329726 
##  Fraction Predicted 0s Correct: 0.9691917 
##  False Negative Rate: 0.26041 
##  False Positive Rate: 0.01778102 
## 
## Test Diagnostics:
## 
##  Null Hypothesis: qap 
##  Replications: 100 
##  Distribution Summary:
## 
##        (intercept)        x1
## Min      -143.7025   -3.6334
## 1stQ     -143.5779   -0.6945
## Median   -143.4592    0.4006
## Mean     -143.4637    0.4265
## 3rdQ     -143.3614    1.3510
## Max      -143.0888    5.1278
common <- as.vector(common.friends)
relation <- as.vector(contact.strong.matrix)
b0 <- log$coefficients[1]
b1 <- log$coefficients[2]
probs <- rep(0,length(common))
for (i in 1:length(common)) {
  probs[i] <- (exp(b0+b1*common[i]))/(1+exp(b0+b1*common[i]))
}
ggplot(data.frame(common, relation, probs)) + 
  geom_point(aes(x = common, y = probs), size = 1, color = "salmon") + 
  geom_point(aes(x = common, y = relation), size = 1, color = "blue") +
  labs(x = "Number of Common Friends", y = "Probability")